capture log close

*************************************************************   
* do-file to create major level variables from micro census *
* "A replication of Ochsenfeld (2016), Patzina & Toussaint" *											
*************************************************************

version 13
clear
set more off

*Definiton of the global path for importing micro census datasets
global data "Z:\Projects\p000165_DUA_3627\Gender Segregation\Data"

*Definition of the global path for reading in Do Files
global file "Y:\"
 
*Definition of the global path for saving datasets and logfiles
global project "Y:\"

cd "${project}"
log using census_variables, replace


**********************************
* information from German census *
**********************************

*	Pool datasets from 2011, 2012, 2013
* 2013
use "${data}\mz13.dta", clear
gen id_n = _n
keep id_n EF436 EF134 EF313 EF46 EF1 EF314 EF44 EF310 EF312 EF77 EF129 EF201 EF866 EF952
gen cohort = 2013
save "mzpooled.dta", replace

* 2012
use "${data}\mz12.dta", clear
gen id_n = _n + 500000

keep id_n EF436 EF134 EF313 EF46 EF1 EF314 EF44 EF310 EF312 EF77 EF129 EF201 EF866 EF952
gen cohort = 2012
append using "mzpooled.dta"
save "${project}\mzpooled.dta", replace

* 2011
use "${data}\mz11.dta", clear
gen id_n = _n + 1000000
keep id_n EF436 EF134 EF313 EF46 EF1 EF314 EF44 EF310 EF312 EF77 EF129 EF201 EF866 EF952
gen cohort = 2011
recode EF201 (-5 -3 -2 = 0)
recode EF310 (-5 -3 -2 = 0)
append using "mzpooled.dta"
save "${project}\mzpooled.dta", replace


*** person characteristics (for mincer regression) ***

* wages (following Glocker/Storck 2014)
recode EF436 (50 90 99=.)(1=52)(2=231)(3=404)(4=620)(5=818)(6=1023)(7=1227) ///
	(8=1437)(9=1627)(10=1884)(11=2171)(12=2475)(13=2777)(14=3065)(15=3438) ///
	(16=3865)(17=4284)(18=4836)(19=5280)(20=5864)(21=6764)(22=9005) ///
	(23=12886)(24=21778), gen(inc_mon)
	
* hrs worked (top coded at 80 hrs/week)
recode EF134(99 0 -3 -2=.)(80/150=80), gen(arbzt_woch)

* hourly wage
gen wage = inc_mon / (4*arbzt_woch)
gen lnwage = ln(wage) 

* gender
recode EF46(1=0)(2=1), gen(frau)

* east germany
recode EF1(1/10=0)(11/16=1), gen(ost)

* duration since receipt of degree (in years)
replace EF314 =. if EF314 <= 0 | EF314 > 2009
gen exper = cohort - EF314
gen exper2 = exper^2

* age
gen age = EF44
gen age2 = age^2

* college degree
recode EF312 (-5 -3 0 99=.)(1/5=0)(6/10=1)(11=0), gen(hsabs)
keep if hsabs==1

* college Major
	* scheme (almost) identical with HIS but coding is not
	* therefore first step: code census into HIS coding
	* Geosciences/Geography --> Geoscience ; Regional science --> Geography
	* Teachers --> 500

recode EF313 (-5 -3 0 98 99=.)(2=2)(3=3)(5=4)(6=5)(7 8=6)(9=7)(10=8)(11=9) ///
	(12=10)(13=11)(14=12)(15=13)(16=14)(17=15)(18=16) ///
	(25=22)(26=1)(28=25)(29=44)(31=28)(32=29)(33 34 35 36 37=30) ///
	(38=31)(40 41=37)(42=38)(43=39)(44=40)(45=41)(46=42)(47=43)(48=49) ///
	(49=50)(50=51)(51=58)(52=57)(53=59)(54 55=60)(56=36)(57=62)(58 59=63) ///
	(60 61=64)(62=40)(63=65)(66=66)(67=68)(72 73=74)(74=77)(75=78)(76=76)(92=27) ///
	(4 64 65 68 69 70 71 77 78 80 81 82 84 85 86 87 88 89 90 91 93 94 95=.) ///
	(19 20 21 22 23 24=500), gen(B1ber1ab1)

* Merge small but similar majors
	* cath. theology --> prot. theology
	recode B1ber1ab1(3=2)
	* philosophy --> history
	recode B1ber1ab1(4=5)
	* non-german philologies and cultural sciences --> cultural sciences
	recode B1ber1ab1(1 7 8 10 11 12 13=14)
	* special needs pedagogy --> pedagogy
	recode B1ber1ab1(17=16)
	* social and economic studies, political science --> social sciences
	recode B1ber1ab1(23 25=26)
	* mining and metallurgy --> geosciences
	recode B1ber1ab1(62=43)
	* urban and regional planning --> geography
	recode B1ber1ab1(67 57=44)
	* dentistry --> medical sciences
	recode B1ber1ab1(50=49)
	* forestry --> agronomy
	recode B1ber1ab1(59=58)
	* design --> architecture and interior design
	recode B1ber1ab1(76=66)
	* surveying and mapping, traffic engineering --> civil engineering
	recode B1ber1ab1 (65 69=68)
	* fine arts, performing arts, music, musicology --> art, art history
	recode B1ber1ab1(75 77 78=74)
	* state teacher
	recode B1ber1ab1(500=80)

tab B1ber1ab1

* Only the fields with sufficient observations in NEPS 
keep if (B1ber1ab1 ==5 | B1ber1ab1 ==14 | B1ber1ab1 ==15 | B1ber1ab1 ==16 | ///
	B1ber1ab1 ==26 | B1ber1ab1 ==27 | B1ber1ab1 ==28 | B1ber1ab1 ==30 | B1ber1ab1 ==31 | ///
	B1ber1ab1 ==37 | B1ber1ab1 ==38 | B1ber1ab1 ==39 | B1ber1ab1 ==40 | B1ber1ab1 ==42 | ///
	B1ber1ab1 ==44 | B1ber1ab1 ==49 | B1ber1ab1 ==58 | B1ber1ab1 ==63 | B1ber1ab1 ==64 | ///
	B1ber1ab1 ==66 | B1ber1ab1 ==68 | B1ber1ab1 ==74 | B1ber1ab1 ==80)

* full university vs. univ. of applied sciences
recode EF312 (-5 -3 0 99=.)(9/10=1)(1/8=0)(11=0), gen(univ)

* full abitur or not
recode EF310 (-5 -3 0 9=.)(5=1)(1 2 3 4 6=0), gen(abi)

* phd
recode EF312 (-5 -3 0 99=.)(10=1)(1/9 11=0), gen(promo)
replace promo = 0 if (B1ber1ab1 == 49 | B1ber1ab1 == 50) 	/* MDs and dentists do not hold real PhDs */

* sample
gen coh2011=0
gen coh2012=0
gen coh2013=0
replace coh2011=1 if cohort==2011
replace coh2012=1 if cohort==2012
replace coh2013=1 if cohort==2013

* gen fulltime
gen fulltime=0
replace fulltime=1 if arbzt_woch>=35 


*** define sample ***
drop if hsabs !=1 						/* without college degree 		*/
drop if wage<2							/* implausibly low wages 		*/
drop if arbzt_woch<20					/* less than 20 hrs per week 	*/ 
drop if EF77!=1 						/* no paid employment 			*/
drop if EF314 - (cohort - age) > 35 	/* age at reception of degree not between 18 and 35 */ 
drop if EF314 - (cohort - age) < 18 
drop if EF201>0 						/* more than one job 			*/
keep if age >= 25 & age <=55  			/* age not between 25 and 55 	*/
egen miss_ind = rowmiss(lnwage cohort frau ost exper exper2 age age2 B1ber1ab1 univ abi promo)
drop if miss_ind!=0						/* listwise deletion 			*/

qui tab B1ber1ab1, gen(fachdum)
global fachnr = r(r)


***************************************************
***		compute major level characteristics		***
***************************************************

/*** results matrix 1 (fstats)
	 1 - major id
	 2 - major size
	 3 - proportion female (mean)
	 4 - proportion female (se)
 human capital Theory
	 5 - worklife index (mean)
	 6 - worklife index (se)
	 7 - hrs worked if full time job (mean)
	 8 - hrs worked if full time job (se)
 anticipation of discrimination
	 9  - perceived discrimination (mean of women's responses)
	10  - perceived discrimination (se)
 gender roles: breadwinner 
	11 - wage level
	12 - wage level (se)
***/

matrix fstats = J($fachnr , 12 , .)
matrix colnames fstats = 1_B1ber1ab1 2_obs 3_%frau 4_%frau_se 5_worklife 6_worklife_se ///
	7_ftarbzt 8_ftarbzt_se 9_%sdiscr 10_%sdiscr_se 11_intercpt 12_intercpt_se

* sampling weights
svyset id[pw=EF952]
	
qui forvalues i = 1/$fachnr {
	summarize B1ber1ab1 if (fachdum`i'==1)
		matrix fstats [`i', 1] = r(mean)
		matrix fstats [`i', 2] = r(N)
	svy: mean arbzt_woch if (fachdum`i'==1 & arbzt_woch>=35)
		matrix fstats [`i', 7] = _b[arbzt_woch]
		matrix fstats [`i', 8] = _se[arbzt_woch]
	}

* wage regression											
global fdum = "fachdum1-fachdum7 fachdum9-fachdum23" 										
global controls = "ost exper exper2 age age2 abi promo univ coh2011 coh2012"

*controls: productivity related characteristics
*majors: ref. econ. science								

reg lnwage $fdum $controls [pw=EF952]
est sto mincer


* plug estimates into results matrix
forvalues i = 1/7 {											/* refcat: econ sciences=8 	*/
	matrix fstats [`i',11] = (exp(_b[fachdum`i']))-1		/* intercept b  */
	matrix fstats [`i',12] = (exp(_se[fachdum`i']))-1		/* intercept se */ 
	}
forvalues i = 9/$fachnr {									/* refcat: econ sciences=8 */			
	matrix fstats [`i',11] = (exp(_b[fachdum`i']))-1		/* intercept b  */
	matrix fstats [`i',12] = (exp(_se[fachdum`i']))-1		/* intercept se */ 
	}
foreach num of numlist 11 {
	matrix fstats [8,`num'] = 0								/* refcat: econ sciences=8 */
	}
matrix list fstats, format (%8.3g)



*** reformat for transport to NEPS server ***
svmat fstats
keep fstats*
lab val fstats1 stb
lab var fstats2 "Observations"
lab var fstats5 "work-life satisfaction"
lab var fstats7 "hrs. worked(ft)"
lab var fstats9 "discrimination vs. women"
lab var fstats11 "intercept wage"

****
gen n=_n
drop if n > $fachnr
drop n
save "${project}\fstats.dta", replace

*** END OF DOFILE ***	
log close
exit, clear


